package org.carwler.util;

import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.carwler.pojo.HouseInfo;

public class RegexUtil {

	private void regexTitle(List<HouseInfo> houseList, String targetStr) {
        Pattern pattern1 = Pattern
                .compile("<div class=\"title\">\\s{1,}<a(.+?)</div>");
        Matcher matcher1 = pattern1.matcher(targetStr);
        while (matcher1.find()) {
            //设置title
            Pattern pattern2 = Pattern.compile(">[\u0391-\uFFE5]+.+?</a>");
            Matcher matcher2 = pattern2.matcher(matcher1.group());
            String matcherString = "";
            HouseInfo info = new HouseInfo();
            if (matcher2.find()) {
                matcherString = matcher2.group();
                info.setTitle(matcherString.substring(1,
                        matcherString.length() - 4));
            }
            //设置外连接
            pattern2 = Pattern.compile("((http|https)://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)?");
            matcher2 = pattern2.matcher(matcher1.group());
            if (matcher2.find()) {
                info.setLink(matcher2.group());
            }
            houseList.add(info);
        }
    }
}
